Chapter 10 Gut microbiota: functional analysis
load("data/gut/data.Rdata")
sample_metadata <- sample_metadata %>%
filter(sample!="EHI02625")
sample_metadata$environment <- factor(sample_metadata$environment, levels=c("low", "high"))
treatment_colors <- c("#f56042","#429ef5")
genome_counts_filt <- genome_counts_filt %>%
select(-EHI02625)
genome_counts_filt <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts),]
rownames(genome_counts_filt) <- NULL#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts[, !grepl("^S", colnames(genome_gifts))],GIFT_db) # Remove structure traits
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
select_if(~ !is.numeric(.) || sum(.) != 0)
#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)
#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)
#Get community-weighed average GIFTs per sample
genome_counts_row <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
column_to_rownames(., "genome")
#genome_counts_row <- rownames_to_column(genome_counts_row, "genome")
GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)10.1 Metabolic capacity index (MCI)
GIFTs_functions_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = "sample") %>%
group_by(environment) %>%
summarise(MCI = mean(value), sd = sd(value)) %>%
tt()| environment | MCI | sd |
|---|---|---|
| low | 0.2687300 | 0.01151867 |
| high | 0.2657253 | 0.01442070 |
MCI <- GIFTs_functions_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = "sample")
shapiro.test(MCI$value) %>%
tidy()# A tibble: 1 × 3
statistic p.value method
<dbl> <dbl> <chr>
1 0.977 0.741 Shapiro-Wilk normality test
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 136 0.345 Wilcoxon rank sum exact test two.sided
MCI %>%
ggplot(aes(x=environment, y=value, color = environment, fill=environment)) +
geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
geom_jitter(width = 0.1, show.legend = TRUE) +
scale_color_manual(values=treatment_colors)+
scale_fill_manual(values=treatment_colors)+
theme_minimal() +
theme(
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())10.2 Wilcoxon
10.2.1 Community elements differences:
element_gift <- GIFTs_elements_community %>%
as.data.frame() %>%
rownames_to_column(., "sample") %>%
left_join(sample_metadata %>% select(sample,environment,river), by=join_by("sample"=="sample"))uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)
significant_elements <- element_gift %>%
pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
filter(p_adjust < 0.05)%>%
rownames_to_column(., "Elements") %>%
left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))
element_gift_t <- element_gift %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "trait")
element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))
element_gift_filt %>%
select(-c(sample,river))%>%
group_by(environment) %>%
summarise(across(everything(), mean))%>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Elements") %>%
left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(Elements == Code_element)) Elements low high Function
1 B0210 0.5209436000 0.4509265000 Amino acid biosynthesis_Leucine
2 B0215 0.4987544000 0.5656788000 Amino acid biosynthesis_Histidine
3 B0218 0.3182126000 0.2983654000 Amino acid biosynthesis_Tyrosine
4 B0219 0.0231044000 0.0186570700 Amino acid biosynthesis_GABA
5 B0220 0.0655121800 0.0480075800 Amino acid biosynthesis_Beta-alanine
6 B0307 0.3575178000 0.3030128000 Amino acid derivative biosynthesis_Spermidine
7 B0706 0.4328692000 0.5003124000 Vitamin biosynthesis_Biotin (B7)
8 B0710 0.0526786800 0.0786377400 Vitamin biosynthesis_Phylloquinone (K1)
9 B0711 0.2579474000 0.3005016000 Vitamin biosynthesis_Menaquinone (K2)
10 B1012 0.0149240150 0.0065831860 Antibiotic biosynthesis_Fosfomycin
11 D0101 0.0928441900 0.0506893400 Lipid degradation_Triglyceride
12 D0104 0.1351958300 0.0936400900 Lipid degradation_Dicarboxylic acids
13 D0504 0.0211782000 0.0540603300 Amino acid degradation_Methionine
14 D0516 0.0780710300 0.0469275900 Amino acid degradation_Beta-alanine
15 D0606 0.0426329300 0.0253378300 Nitrogen compound degradation_Allantoin
16 D0610 0.0046945340 0.0091389410 Nitrogen compound degradation_Methylamine
17 D0704 0.3817523000 0.3135096000 Alcohol degradation_Glycerol
18 D0709 0.0007728956 0.0000000000 Alcohol degradation_Polyvinyl alcohol
19 D0805 0.0029745460 0.0052878350 Xenobiotic degradation_Benzoate
20 D0812 0.0003603398 0.0007750193 Xenobiotic degradation_Naphthalene
21 D0816 0.1123401900 0.0953450600 Xenobiotic degradation_Phenylacetate
22 D0907 0.2604483000 0.2005187000 Antibiotic degradation_Tetracycline
23 D0910 0.3969376000 0.3309116000 Antibiotic degradation_Chloramphenicol
difference_table <- element_gift_filt %>%
select(-sample) %>%
group_by(environment) %>%
summarise(across(everything(), mean)) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Elements") %>%
left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>%
arrange(Function) %>%
mutate(Difference=high-low)%>%
mutate(group_color = ifelse(Difference <0, "Low","High")) means_gift <- element_gift_filt %>%
select(-c(environment,river)) %>%
pivot_longer(!sample, names_to = "elements", values_to = "abundance") %>%
left_join(sample_metadata, by=join_by(sample==sample)) %>%
group_by(environment, elements) %>%
summarise(mean=mean(abundance))
log_fold <- means_gift %>%
group_by(elements) %>%
summarise(
logfc_high_low = log2(mean[environment == "high"] / mean[environment == "low"])
)element_gift_names <- element_gift_filt %>%
select(-c(environment,river)) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Elements") %>%
left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(Elements == Code_element))%>%
select(-Elements)%>%
select(Function, everything())%>%
t()%>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))
colNames <- names(element_gift_names %>% select(-c(sample,environment,river)))
for(i in colNames){
plt <- ggplot(element_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
geom_jitter(width = 0.1, show.legend = TRUE) +
scale_color_manual(values=treatment_colors)+
scale_fill_manual(values=treatment_colors) +
theme_minimal() +
theme(
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
print(plt)
}difference_table %>%
ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) +
geom_col() +
# geom_point(size=4) +
scale_fill_manual(values=treatment_colors) +
geom_hline(yintercept=0) +
coord_flip()+
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
legend.position = "right",
panel.background = element_blank(),
panel.grid.major = element_line(size = 0.15, linetype = 'solid',
colour = "grey"))+
xlab("Microbial Functional Capacity") +
ylab("Mean difference")+
labs(fill="Elevation")uniqueGIFT <- unique(GIFT_db[c(2,3,4,5,6)])
code_function <- difference_table %>%
left_join(uniqueGIFT[c(1:3)], by=join_by(Elements==Code_element))
unique_codes<-unique(code_function$Code_function)
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
filter(Code_function %in% unique_codes)%>%
mutate(legend=str_c(Code_function," - ",Function))
code_function %>%
# mutate(Difference_abs = abs(Difference)) %>%
left_join(significant_elements, by=join_by(Elements==trait)) %>%
left_join(log_fold, by=join_by(Elements==elements)) %>%
left_join(gift_colors, by=join_by(Code_function==Code_function)) %>%
ggplot(., aes(x = logfc_high_low, y = -log(p_adjust), color=legend, size=abs(Difference))) +
geom_jitter(width = 0.2, height = 0.2)+
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color)+
#xlim(c(-10,4)) +
theme_classic()+
labs(size="Mean difference (abs)", color="Functional trait")+
labs(x = "Log-fold change", y="-Log adjusted p-value") +
geom_text_repel(aes(label = Element), min.segment.length = 0.4, size=2.5, max.overlaps = Inf)10.2.2 Community functions differences
function_gift <- GIFTs_functions_community %>%
as.data.frame() %>%
rownames_to_column(., "sample") %>%
merge(., sample_metadata %>% select(sample,environment,river), by="sample")unique_funct_db<- GIFT_db[c(3,4,5)] %>%
distinct(Code_function, .keep_all = TRUE)
significant_functional <- function_gift %>%
pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
filter(p_adjust < 0.05)%>%
left_join(.,unique_funct_db[c(1,3)],by = join_by(trait == Code_function))function_gift_t <- function_gift %>%
select(-environment) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "trait")
function_gift_filt <- subset(function_gift_t, trait %in% significant_functional$trait) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(., sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))
function_gift_filt %>%
select(-c(sample,river)) %>%
group_by(environment) %>%
summarise(across(everything(), mean))%>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Code_function") %>%
left_join(unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function)) Code_function low high Function
1 D01 0.09715327 0.06631964 Lipid degradation
2 D07 0.23552990 0.19997740 Alcohol degradation
function_gift_names <- function_gift_filt%>%
select(-c(environment,river)) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Code_function") %>%
left_join(.,unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function))%>%
select(-Code_function)%>%
select(Function, everything())%>%
t()%>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))
colNames <- names(function_gift_names)[2]
for(i in colNames){
plt <- ggplot(function_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
geom_jitter(width = 0.1, show.legend = TRUE) +
scale_color_manual(values=treatment_colors)+
scale_fill_manual(values=treatment_colors)+
theme_minimal() +
theme(
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
print(plt)
}10.2.3 Community domains differences
domain_gift <- GIFTs_domains_community %>%
as.data.frame() %>%
rownames_to_column(., "sample") %>%
merge(sample_metadata %>% select(sample,environment,river), by="sample")unique_domain_db<- GIFT_db[c(4)] %>%
distinct(Domain, .keep_all = TRUE)
significant_domain <- domain_gift %>%
pivot_longer(-c(sample,environment,river), names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
summarise(p_value = wilcox.test(value ~ environment)$p.value) %>%
mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
filter(p_adjust < 0.05)domain_gift_t <- domain_gift %>%
select(-environment) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "trait")
domain_gift_filt <- subset(domain_gift_t, trait %in% significant_domain$trait) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample") %>%
left_join(sample_metadata %>% select(sample,environment,river), by = "sample")
domain_gift_filt %>%
select(-c(sample,river)) %>%
group_by(environment) %>%
summarise(across(everything(), mean))%>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Code_domain") Code_domain low high
1 Degradation 0.1839132 0.1748753
domain_gift_names <- domain_gift_filt%>%
select(-environment)%>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Code_domain") %>%
# select(-Code_domain)%>%
# select(domain, everything())%>%
t()%>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(sample_metadata %>% select(sample,environment,river), by = join_by(sample == sample))
colNames <- names(domain_gift_names)[2:3]
for(i in colNames){
plt <- ggplot(domain_gift_names, aes(x=environment, y=.data[[i]], color = environment, fill=environment)) +
geom_boxplot(alpha = 0.2, outlier.shape = NA, width = 0.3, show.legend = FALSE) +
geom_jitter(width = 0.1, show.legend = TRUE) +
scale_color_manual(values=treatment_colors)+
scale_fill_manual(values=treatment_colors)+
theme_minimal() +
theme(
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank())
print(plt)
}